#define __STDC_FORMAT_MACROS
#define PY_SSIZE_T_CLEAN
#include <Python.h>

#include <algorithm>
#include <array>
#include <cinttypes>
#include <cstdio>
#include <stdexcept>
#include <unordered_map>

#include "compat.h"
#include "hooks.h"
#include "logging.h"
#include "record_reader.h"
#include "records.h"
#include "source.h"

namespace memray::api {

using namespace tracking_api;
using namespace io;

namespace {  // unnamed

const char*
allocatorName(hooks::Allocator allocator)
{
    switch (allocator) {
        case hooks::Allocator::MALLOC:
            return "malloc";
        case hooks::Allocator::FREE:
            return "free";
        case hooks::Allocator::CALLOC:
            return "calloc";
        case hooks::Allocator::REALLOC:
            return "realloc";
        case hooks::Allocator::POSIX_MEMALIGN:
            return "posix_memalign";
        case hooks::Allocator::ALIGNED_ALLOC:
            return "aligned_alloc";
        case hooks::Allocator::MEMALIGN:
            return "memalign";
        case hooks::Allocator::VALLOC:
            return "valloc";
        case hooks::Allocator::PVALLOC:
            return "pvalloc";
        case hooks::Allocator::MMAP:
            return "mmap";
        case hooks::Allocator::MUNMAP:
            return "munmap";
        case hooks::Allocator::PYMALLOC_MALLOC:
            return "pymalloc_malloc";
        case hooks::Allocator::PYMALLOC_CALLOC:
            return "pymalloc_calloc";
        case hooks::Allocator::PYMALLOC_REALLOC:
            return "pymalloc_realloc";
        case hooks::Allocator::PYMALLOC_FREE:
            return "pymalloc_free";
    }

    return nullptr;
}

}  // unnamed namespace

void
RecordReader::readHeader(HeaderRecord& header)
{
    if (!d_input->read(header.magic, sizeof(MAGIC)) || (memcmp(header.magic, MAGIC, sizeof(MAGIC)) != 0))
    {
        throw std::ios_base::failure(
                "The provided input file does not look like a binary generated by memray.");
    }
    if (!d_input->read(reinterpret_cast<char*>(&header.version), sizeof(header.version))) {
        throw std::ios_base::failure("Failed to read input file header.");
    }
    if (header.version != CURRENT_HEADER_VERSION) {
        throw std::ios_base::failure(
                "The provided input file is incompatible with this version of memray.");
    }
    header.command_line.reserve(4096);
    if (!d_input->read(reinterpret_cast<char*>(&header.python_version), sizeof(header.python_version))
        || !d_input->read(reinterpret_cast<char*>(&header.native_traces), sizeof(header.native_traces))
        || !d_input->read(reinterpret_cast<char*>(&header.file_format), sizeof(header.file_format))
        || !d_input->read(reinterpret_cast<char*>(&header.stats), sizeof(header.stats))
        || !d_input->getline(header.command_line, '\0')
        || !d_input->read(reinterpret_cast<char*>(&header.pid), sizeof(header.pid))
        || !d_input->read(reinterpret_cast<char*>(&header.main_tid), sizeof(header.main_tid))
        || !d_input->read(
                reinterpret_cast<char*>(&header.skipped_frames_on_main_tid),
                sizeof(header.skipped_frames_on_main_tid))
        || !d_input->read(
                reinterpret_cast<char*>(&header.python_allocator),
                sizeof(header.python_allocator))
        || !d_input->read(
                reinterpret_cast<char*>(&header.trace_python_allocators),
                sizeof(header.trace_python_allocators))
        || !d_input->read(
                reinterpret_cast<char*>(&header.track_object_lifetimes),
                sizeof(header.track_object_lifetimes)))
    {
        throw std::ios_base::failure("Failed to read input file header.");
    }
}

bool
RecordReader::readVarint(uint64_t* val)
{
    *val = 0;
    int shift = 0;

    while (true) {
        unsigned char next;
        if (!d_input->read(reinterpret_cast<char*>(&next), sizeof(next))) {
            return false;
        }

        *val |= (static_cast<uint64_t>(next & 0x7f) << shift);
        if (0 == (next & 0x80)) {
            return true;
        }

        shift += 7;
        if (shift >= 64) {
            return false;
        }
    }
}

bool
RecordReader::readSignedVarint(int64_t* val)
{
    uint64_t zigzag_val;
    if (!readVarint(&zigzag_val)) {
        return false;
    }

    *val = static_cast<int64_t>((zigzag_val >> 1) ^ (~(zigzag_val & 1) + 1));
    return true;
}

RecordReader::RecordReader(
        std::unique_ptr<Source> source,
        bool track_stacks,
        bool track_object_lifetimes)
: d_input(std::move(source))
, d_track_stacks(track_stacks)
, d_track_object_lifetimes(track_object_lifetimes)
{
    readHeader(d_header);

    // Reserve some space for the different containers
    d_thread_names.reserve(16);

    if (d_track_stacks) {
        d_native_frames.reserve(d_header.native_traces ? 2048 : 0);
    }
}

void
RecordReader::close() noexcept
{
    d_input->close();
}

bool
RecordReader::isOpen() const noexcept
{
    return d_input->is_open();
}

bool
RecordReader::parseFramePush(FramePush* record, unsigned int flags)
{
    record->frame.is_entry_frame = flags & 1;
    return readVarint(&record->frame.code_object_id)
           && readSignedVarint(&record->frame.instruction_offset);
}

bool
RecordReader::processFramePush(const FramePush& record)
{
    if (!d_track_stacks) {
        return true;
    }
    if (!d_curr_thread_stack) {
        throw std::runtime_error("invalid capture file: FRAME_PUSH with no previous CONTEXT_SWITCH");
    }
    auto& stack = *d_curr_thread_stack;
    FrameTree::index_t current_stack_id = stack.empty() ? 0 : stack.back();
    FrameTree::index_t new_stack_id;
    {
        std::unique_lock<std::mutex> lock(d_mutex);
        auto frame_id = d_python_frame_registry.registerRecord(record.frame).first;
        new_stack_id = d_tree.getTraceIndex(current_stack_id, frame_id);
    }
    stack.push_back(new_stack_id);
    return true;
}

bool
RecordReader::parseFramePop(FramePop* record, unsigned int flags)
{
    record->count = flags + 1;
    return true;
}

bool
RecordReader::processFramePop(const FramePop& record)
{
    if (!d_track_stacks) {
        return true;
    }
    if (!d_curr_thread_stack) {
        throw std::runtime_error("invalid capture file: FRAME_POP with no previous CONTEXT_SWITCH");
    }

    auto& stack = *d_curr_thread_stack;
    assert(!stack.empty());
    auto count = record.count;
    while (count) {
        --count;
        stack.pop_back();
    }
    return true;
}

bool
RecordReader::parseCodeObjectRecord(tracking_api::pycode_map_val_t* pycode_val)
{
    size_t linetable_size = 0;
    if (!readVarint(&pycode_val->first) || !d_input->getline(pycode_val->second.function_name, '\0')
        || !d_input->getline(pycode_val->second.filename, '\0')
        || !readIntegralDelta(&d_last.code_firstlineno, &pycode_val->second.firstlineno)
        || !readVarint(&linetable_size))
    {
        return false;
    }

    pycode_val->second.linetable.resize(linetable_size);
    return d_input->read(const_cast<char*>(pycode_val->second.linetable.data()), linetable_size);
}

bool
RecordReader::processCodeObjectRecord(const tracking_api::pycode_map_val_t& pycode_val)
{
    if (!d_track_stacks) {
        return true;
    }
    std::lock_guard<std::mutex> lock(d_mutex);
    d_code_object_map[pycode_val.first] = pycode_val.second;
    return true;
}

bool
RecordReader::parseNativeFrameIndex(UnresolvedNativeFrame* frame)
{
    return readIntegralDelta(&d_last.instruction_pointer, &frame->ip)
           && readIntegralDelta(&d_last.native_frame_id, &frame->index);
}

bool
RecordReader::processNativeFrameIndex(const UnresolvedNativeFrame& frame)
{
    if (!d_track_stacks) {
        return true;
    }
    std::lock_guard<std::mutex> lock(d_mutex);
    d_native_frames.emplace_back(frame);
    return true;
}

bool
RecordReader::parseAllocationRecord(AllocationRecord* record, unsigned int flags)
{
    unsigned int pointer_cache_index = (flags >> 3) & 0x0f;
    if (pointer_cache_index == 0x0f) {
        // Cache miss, read the pointer, then update the cache
        if (!readIntegralDelta(&d_last.data_pointer, &record->address)) {
            return false;
        }

        record->address <<= 3;

        std::move(
                d_recent_addresses.begin(),
                d_recent_addresses.end() - 1,
                d_recent_addresses.begin() + 1);
        d_recent_addresses[0] = record->address;
    } else {
        // Cache hit, reuse previous pointer
        record->address = d_recent_addresses[pointer_cache_index];
    }

    auto allocator_id = flags & 7;
    if (allocator_id) {
        record->allocator = static_cast<hooks::Allocator>(allocator_id);
    } else {
        if (!d_input->read(reinterpret_cast<char*>(&record->allocator), sizeof(record->allocator))) {
            return false;
        }
    }

    if (d_header.native_traces
        && hooks::allocatorKind(record->allocator) != hooks::AllocatorKind::SIMPLE_DEALLOCATOR)
    {
        if (!readIntegralDelta(&d_last.native_frame_id, &record->native_frame_id)) {
            return false;
        }
    } else {
        record->native_frame_id = 0;
    }

    if (hooks::allocatorKind(record->allocator) == hooks::AllocatorKind::SIMPLE_DEALLOCATOR) {
        record->size = 0;
    } else if (!readVarint(&record->size)) {
        return false;
    }

    return true;
}

bool
RecordReader::processAllocationRecord(const AllocationRecord& record)
{
    if (!d_curr_thread_stack) {
        throw std::runtime_error("invalid capture file: ALLOCATION with no previous CONTEXT_SWITCH");
    }
    d_latest_allocation.tid = d_last.thread_id;
    d_latest_allocation.address = record.address;
    d_latest_allocation.size = record.size;
    d_latest_allocation.allocator = record.allocator;
    if (d_track_stacks && !hooks::isDeallocator(record.allocator)) {
        d_latest_allocation.native_frame_id = record.native_frame_id;
        auto& stack = *d_curr_thread_stack;
        d_latest_allocation.frame_index = stack.empty() ? 0 : stack.back();
        d_latest_allocation.native_segment_generation = d_symbol_resolver.currentSegmentGeneration();
    } else {
        d_latest_allocation.native_frame_id = 0;
        d_latest_allocation.frame_index = 0;
        d_latest_allocation.native_segment_generation = 0;
    }
    d_latest_allocation.n_allocations = 1;
    return true;
}

bool
RecordReader::parseMemoryMapStart()
{
    // Currently nothing to do (this record type has no body)
    return true;
}

bool
RecordReader::processMemoryMapStart()
{
    std::lock_guard<std::mutex> lock(d_mutex);
    d_symbol_resolver.clearSegments();
    return true;
}

bool
RecordReader::parseSegmentHeader(std::string* filename, size_t* num_segments, uintptr_t* addr)
{
    return d_input->getline(*filename, '\0') && readVarint(num_segments)
           && d_input->read(reinterpret_cast<char*>(addr), sizeof(*addr));
}

bool
RecordReader::processSegmentHeader(const std::string& filename, size_t num_segments, uintptr_t addr)
{
    std::vector<Segment> segments;
    segments.reserve(num_segments);
    for (size_t i = 0; i < num_segments; i++) {
        RecordType record_type;
        if (!d_input->read(reinterpret_cast<char*>(&record_type), sizeof(record_type))
            || (record_type != RecordType::SEGMENT))
        {
            return false;
        }

        Segment segment{};
        if (!parseSegment(&segment)) {
            return false;
        }
        if (d_track_stacks) {
            segments.emplace_back(segment);
        }
    }

    if (d_track_stacks) {
        std::lock_guard<std::mutex> lock(d_mutex);
        d_symbol_resolver.addSegments(filename, addr, segments);
    }
    return true;
}

bool
RecordReader::parseSegment(Segment* segment)
{
    if (!d_input->read(reinterpret_cast<char*>(&segment->vaddr), sizeof(segment->vaddr))
        || !readVarint(&segment->memsz))
    {
        return false;
    }
    return true;
}

bool
RecordReader::parseThreadRecord(std::string* name)
{
    return d_input->getline(*name, '\0');
}

bool
RecordReader::processThreadRecord(const std::string& name)
{
    d_thread_names[d_last.thread_id] = name;
    return true;
}

bool
RecordReader::parseMemoryRecord(MemoryRecord* record)
{
    if (!readVarint(&record->rss) || !readVarint(&record->ms_since_epoch)) {
        return false;
    }
    record->ms_since_epoch += d_header.stats.start_time;
    return true;
}

bool
RecordReader::processMemoryRecord(const MemoryRecord& record)
{
    d_latest_memory_record = record;
    return true;
}

bool
RecordReader::parseContextSwitch(thread_id_t* tid)
{
    return d_input->read(reinterpret_cast<char*>(tid), sizeof(*tid));
}

bool
RecordReader::processContextSwitch(thread_id_t tid)
{
    d_last.thread_id = tid;
    auto [it, inserted] = d_stack_traces.emplace(tid, stack_t{});
    auto& stack = it->second;
    d_curr_thread_stack = &stack;
    if (inserted) {
        stack.reserve(1024);
    }
    return true;
}

bool
RecordReader::parseMemorySnapshotRecord(MemorySnapshot* record)
{
    return d_input->read(reinterpret_cast<char*>(record), sizeof(*record));
}

bool
RecordReader::processMemorySnapshotRecord(const MemorySnapshot& record)
{
    d_latest_memory_snapshot = record;
    return true;
}

bool
RecordReader::parseAggregatedAllocationRecord(AggregatedAllocation* record)
{
    return d_input->read(reinterpret_cast<char*>(record), sizeof(*record));
}

bool
RecordReader::processAggregatedAllocationRecord(const AggregatedAllocation& record)
{
    d_latest_aggregated_allocation = record;
    return true;
}

bool
RecordReader::parsePythonTraceIndexRecord(std::pair<frame_id_t, FrameTree::index_t>* record)
{
    return readVarint(&record->first) && readVarint(&record->second);
}

bool
RecordReader::processPythonTraceIndexRecord(const std::pair<frame_id_t, FrameTree::index_t>& record)
{
    if (!d_track_stacks) {
        return true;
    }
    std::lock_guard<std::mutex> lock(d_mutex);
    d_tree.getTraceIndex(record.second, record.first);  // Called for its side effect.
    return true;
}

bool
RecordReader::parsePythonFrameIndexRecord(std::pair<frame_id_t, Frame>* pyframe_val)
{
    auto& [frame_id, frame] = *pyframe_val;
    if (!readVarint(&frame_id) || !readVarint(&frame.code_object_id)
        || !readSignedVarint(&frame.instruction_offset)
        || !d_input->read(reinterpret_cast<char*>(&frame.is_entry_frame), sizeof(frame.is_entry_frame)))
    {
        return false;
    }

    return true;
}

bool
RecordReader::processPythonFrameIndexRecord(const std::pair<frame_id_t, Frame>& record)
{
    if (!d_track_stacks) {
        return true;
    }
    std::lock_guard<std::mutex> lock(d_mutex);
    frame_id_t frame_id = d_python_frame_registry.registerRecord(record.second).first;
    if (frame_id != record.first) {
        throw std::runtime_error("Frame ID mismatch");
    }
    return true;
}

bool
RecordReader::parseObjectRecord(ObjectRecord* record, unsigned int flags)
{
    record->is_created = (flags & 0x01) > 0;

    unsigned int pointer_cache_index = (flags >> 1) & 0x0f;
    if (pointer_cache_index == 0x0f) {
        // Cache miss, read the pointer, then update the cache
        if (!readIntegralDelta(&d_last.data_pointer, &record->address)) {
            return false;
        }

        record->address <<= 3;

        std::move(
                d_recent_addresses.begin(),
                d_recent_addresses.end() - 1,
                d_recent_addresses.begin() + 1);
        d_recent_addresses[0] = record->address;
    } else {
        // Cache hit, reuse previous pointer
        record->address = d_recent_addresses[pointer_cache_index];
    }

    if (d_header.native_traces && record->is_created) {
        if (!readIntegralDelta(&d_last.native_frame_id, &record->native_frame_id)) {
            return false;
        }
    } else {
        record->native_frame_id = 0;
    }
    return true;
}

bool
RecordReader::processObjectRecord(const ObjectRecord& record)
{
    d_latest_object.tid = d_last.thread_id;
    d_latest_object.address = record.address;
    d_latest_object.native_frame_id = record.native_frame_id;

    if (d_track_stacks && record.is_created) {
        d_latest_object.native_frame_id = record.native_frame_id;
        auto& stack = d_stack_traces[d_latest_object.tid];
        d_latest_object.frame_index = stack.empty() ? 0 : stack.back();
        d_latest_object.native_segment_generation = d_symbol_resolver.currentSegmentGeneration();
    } else {
        d_latest_object.native_frame_id = 0;
        d_latest_object.frame_index = 0;
        d_latest_object.native_segment_generation = 0;
    }

    d_latest_object.is_created = record.is_created;
    return true;
}

bool
RecordReader::parseSurvivingObjectRecord(ObjectRecord* record)
{
    record->is_created = true;  // Surviving objects are always created
    if (!readVarint(reinterpret_cast<uintptr_t*>(&record->address))) {
        return false;
    }
    record->address <<= 3;

    if (d_header.native_traces) {
        if (!readVarint(&record->native_frame_id)) {
            return false;
        }
    } else {
        record->native_frame_id = 0;
    }
    return true;
}

bool
RecordReader::processSurvivingObjectRecord(const ObjectRecord& record)
{
    return processObjectRecord(record);
}

RecordReader::RecordResult
RecordReader::nextRecord()
{
    RecordReader::RecordResult ret;

    if (d_header.file_format == FileFormat::ALL_ALLOCATIONS) {
        ret = nextRecordFromAllAllocationsFile();
        assert(ret != RecordResult::MEMORY_SNAPSHOT);
        assert(ret != RecordResult::AGGREGATED_ALLOCATION_RECORD);
    } else if (d_header.file_format == FileFormat::AGGREGATED_ALLOCATIONS) {
        ret = nextRecordFromAggregatedAllocationsFile();
        assert(ret != RecordResult::MEMORY_RECORD);
        assert(ret != RecordResult::ALLOCATION_RECORD);
    } else {
        LOG(ERROR) << "Invalid file format enumerator";
        return RecordResult::ERROR;
    }
    return ret;
}

Location
RecordReader::frameToLocation(frame_id_t frame_id)
{
    auto it = d_python_location_by_frame_id.find(frame_id);
    if (it == d_python_location_by_frame_id.end()) {
        auto& frame = d_python_frame_registry.getRecord(frame_id);
        auto& code = d_code_object_map[frame.code_object_id];

        int lineno = 0;

        if (!code.linetable.empty() && frame.instruction_offset >= 0) {
            compat::LocationInfo info;
            if (compat::parseLinetable(
                        d_header.python_version,
                        code.linetable,
                        frame.instruction_offset,
                        code.firstlineno,
                        &info))
            {
                lineno = info.lineno;
            }
        }

        it = d_python_location_by_frame_id
                     .emplace(frame_id, Location{code.function_name, code.filename, lineno})
                     .first;
    }
    return it->second;
}

void
RecordReader::extractRecordTypeAndFlags(
        unsigned char record_type_and_flags,
        RecordType* record_type,
        unsigned char* flags) const
{
    unsigned char flags_mask;
    if (record_type_and_flags & static_cast<unsigned char>(RecordType::ALLOCATION)) {
        *record_type = RecordType::ALLOCATION;
        flags_mask = static_cast<unsigned char>(RecordType::ALLOCATION) - 1;
    } else if (record_type_and_flags & static_cast<unsigned char>(RecordType::FRAME_PUSH)) {
        *record_type = RecordType::FRAME_PUSH;
        flags_mask = static_cast<unsigned char>(RecordType::FRAME_PUSH) - 1;
    } else if (record_type_and_flags & static_cast<unsigned char>(RecordType::OBJECT_RECORD)) {
        *record_type = RecordType::OBJECT_RECORD;
        flags_mask = static_cast<unsigned char>(RecordType::OBJECT_RECORD) - 1;
    } else if (record_type_and_flags & static_cast<unsigned char>(RecordType::FRAME_POP)) {
        *record_type = RecordType::FRAME_POP;
        flags_mask = static_cast<unsigned char>(RecordType::FRAME_POP) - 1;
    } else {
        *record_type = static_cast<RecordType>(record_type_and_flags);
        flags_mask = 0;
    }
    *flags = record_type_and_flags & flags_mask;
}

RecordReader::RecordResult
RecordReader::nextRecordFromAllAllocationsFile()
{
    while (true) {
        unsigned char record_type_and_flags;
        if (!d_input->read(
                    reinterpret_cast<char*>(&record_type_and_flags),
                    sizeof(record_type_and_flags)))
        {
            return RecordResult::END_OF_FILE;
        }

        RecordType record_type;
        unsigned char flags;
        extractRecordTypeAndFlags(record_type_and_flags, &record_type, &flags);

        switch (record_type) {
            case RecordType::TRAILER: {
                return RecordResult::END_OF_FILE;
            } break;
            case RecordType::ALLOCATION: {
                AllocationRecord record;
                if (!parseAllocationRecord(&record, flags) || !processAllocationRecord(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process allocation record";
                    return RecordResult::ERROR;
                }
                return RecordResult::ALLOCATION_RECORD;
            } break;
            case RecordType::MEMORY_RECORD: {
                MemoryRecord record;
                if (!parseMemoryRecord(&record) || !processMemoryRecord(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process memory record";
                    return RecordResult::ERROR;
                }
                return RecordResult::MEMORY_RECORD;
            } break;
            case RecordType::CONTEXT_SWITCH: {
                thread_id_t tid;
                if (!parseContextSwitch(&tid) || !processContextSwitch(tid)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process context switch record";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::FRAME_PUSH: {
                FramePush record;
                if (!parseFramePush(&record, flags) || !processFramePush(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process frame push";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::FRAME_POP: {
                FramePop record;
                if (!parseFramePop(&record, flags) || !processFramePop(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process frame pop";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::CODE_OBJECT: {
                tracking_api::pycode_map_val_t record;
                if (!parseCodeObjectRecord(&record) || !processCodeObjectRecord(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process code object";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::NATIVE_TRACE_INDEX: {
                UnresolvedNativeFrame record;
                if (!parseNativeFrameIndex(&record) || !processNativeFrameIndex(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process native frame index";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::MEMORY_MAP_START: {
                if (!parseMemoryMapStart() || !processMemoryMapStart()) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process memory map start";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::SEGMENT_HEADER: {
                std::string filename;
                size_t num_segments;
                uintptr_t addr;
                if (!parseSegmentHeader(&filename, &num_segments, &addr)
                    || !processSegmentHeader(filename, num_segments, addr))
                {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process segment header";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::THREAD_RECORD: {
                std::string name;
                if (!parseThreadRecord(&name) || !processThreadRecord(name)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process thread record";
                    return RecordResult::ERROR;
                }
            } break;
            case RecordType::OBJECT_RECORD: {
                ObjectRecord record;
                if (!parseObjectRecord(&record, flags) || !processObjectRecord(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process native object record";
                    return RecordResult::ERROR;
                }
                if (!d_track_object_lifetimes) {
                    break;
                }
                return RecordResult::OBJECT_RECORD;
            } break;
            default:
                if (d_input->is_open()) LOG(ERROR) << "Invalid record type";
                return RecordResult::ERROR;
        }
    }
}

RecordReader::RecordResult
RecordReader::nextRecordFromAggregatedAllocationsFile()
{
    while (true) {
        AggregatedRecordType record_type;
        if (!d_input->read(reinterpret_cast<char*>(&record_type), sizeof(record_type))) {
            return RecordResult::END_OF_FILE;
        }

        switch (record_type) {
            case AggregatedRecordType::MEMORY_SNAPSHOT: {
                MemorySnapshot record;
                if (!parseMemorySnapshotRecord(&record) || !processMemorySnapshotRecord(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process memory record";
                    return RecordResult::ERROR;
                }

                return RecordResult::MEMORY_SNAPSHOT;
            } break;

            case AggregatedRecordType::AGGREGATED_ALLOCATION: {
                AggregatedAllocation record;
                if (!parseAggregatedAllocationRecord(&record)
                    || !processAggregatedAllocationRecord(record))
                {
                    if (d_input->is_open()) {
                        LOG(ERROR) << "Failed to process aggregated allocation record";
                    }
                    return RecordResult::ERROR;
                }

                return RecordResult::AGGREGATED_ALLOCATION_RECORD;
            } break;

            case AggregatedRecordType::PYTHON_TRACE_INDEX: {
                std::pair<frame_id_t, FrameTree::index_t> record;
                if (!parsePythonTraceIndexRecord(&record) || !processPythonTraceIndexRecord(record)) {
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::PYTHON_FRAME_INDEX: {
                std::pair<frame_id_t, Frame> record;
                if (!parsePythonFrameIndexRecord(&record) || !processPythonFrameIndexRecord(record)) {
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::NATIVE_TRACE_INDEX: {
                UnresolvedNativeFrame record;
                if (!parseNativeFrameIndex(&record) || !processNativeFrameIndex(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process native frame index";
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::MEMORY_MAP_START: {
                if (!parseMemoryMapStart() || !processMemoryMapStart()) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process memory map start";
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::SEGMENT_HEADER: {
                std::string filename;
                size_t num_segments;
                uintptr_t addr;
                if (!parseSegmentHeader(&filename, &num_segments, &addr)
                    || !processSegmentHeader(filename, num_segments, addr))
                {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process segment header";
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::SEGMENT: {
                // These should always be consumed by processSegmentHeader
                if (d_input->is_open()) LOG(ERROR) << "Unexpected SEGMENT record";
                return RecordResult::ERROR;
            } break;

            case AggregatedRecordType::THREAD_RECORD: {
                std::string name;
                if (!parseThreadRecord(&name) || !processThreadRecord(name)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process thread record";
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::CONTEXT_SWITCH: {
                thread_id_t tid;
                if (!parseContextSwitch(&tid) || !processContextSwitch(tid)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process context switch record";
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::SURVIVING_OBJECT: {
                ObjectRecord record;
                if (!parseSurvivingObjectRecord(&record) || !processSurvivingObjectRecord(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process surviving object record";
                    return RecordResult::ERROR;
                }
                if (!d_track_object_lifetimes) {
                    break;
                }
                return RecordResult::OBJECT_RECORD;
            } break;

            case AggregatedRecordType::CODE_OBJECT: {
                tracking_api::pycode_map_val_t record;
                if (!parseCodeObjectRecord(&record) || !processCodeObjectRecord(record)) {
                    if (d_input->is_open()) LOG(ERROR) << "Failed to process code object";
                    return RecordResult::ERROR;
                }
            } break;

            case AggregatedRecordType::AGGREGATED_TRAILER: {
                return RecordResult::END_OF_FILE;
            } break;

            default: {
                if (d_input->is_open()) LOG(ERROR) << "Invalid record type";
                return RecordResult::ERROR;
            } break;
        }
    }
}

// Python public APIs

PyObject*
RecordReader::Py_GetStackFrame(FrameTree::index_t index, size_t max_stacks)
{
    return Py_GetStackFrameAndEntryInfo(index, nullptr, max_stacks);
}

PyObject*
RecordReader::Py_GetStackFrameAndEntryInfo(
        FrameTree::index_t index,
        std::vector<unsigned char>* is_entry_frame,
        size_t max_stacks)
{
    if (!d_track_stacks) {
        PyErr_SetString(PyExc_RuntimeError, "Stack tracking is disabled");
        return NULL;
    }
    if (is_entry_frame) {
        is_entry_frame->clear();
        is_entry_frame->reserve(64);
    }
    std::lock_guard<std::mutex> lock(d_mutex);

    size_t stacks_obtained = 0;
    FrameTree::index_t current_index = index;
    PyObject* list = PyList_New(0);
    if (list == nullptr) {
        return nullptr;
    }

    while (current_index != 0 && stacks_obtained++ != max_stacks) {
        auto [frame_id, next_index] = d_tree.nextNode(current_index);
        const auto& frame = d_python_frame_registry.getRecord(frame_id);
        Location location = frameToLocation(frame_id);
        PyObject* pyframe = location.toPythonObject(d_pystring_cache);
        if (pyframe == nullptr) {
            goto error;
        }
        int ret = PyList_Append(list, pyframe);
        Py_DECREF(pyframe);
        if (ret != 0) {
            goto error;
        }
        if (is_entry_frame) {
            is_entry_frame->push_back(frame.is_entry_frame);
        }
        current_index = next_index;
    }
    return list;
error:
    Py_XDECREF(list);
    return nullptr;
}

PyObject*
RecordReader::Py_GetNativeStackFrame(FrameTree::index_t index, size_t generation, size_t max_stacks)
{
    if (!d_track_stacks) {
        PyErr_SetString(PyExc_RuntimeError, "Stack tracking is disabled");
        return NULL;
    }
    std::lock_guard<std::mutex> lock(d_mutex);

    size_t stacks_obtained = 0;
    FrameTree::index_t current_index = index;
    PyObject* list = PyList_New(0);
    if (list == nullptr) {
        return nullptr;
    }

    while (current_index != 0 && stacks_obtained++ != max_stacks) {
        auto frame = d_native_frames[current_index - 1];
        current_index = frame.index;
        auto resolved_frames = d_symbol_resolver.resolve(frame.ip, generation);
        if (!resolved_frames) {
            continue;
        }
        for (auto& native_frame : resolved_frames->frames()) {
            PyObject* pyframe = native_frame.toPythonObject(d_pystring_cache);
            if (pyframe == nullptr) {
                return nullptr;
            }
            int ret = PyList_Append(list, pyframe);
            Py_DECREF(pyframe);
            if (ret != 0) {
                goto error;
            }
        }
    }
    return list;
error:
    Py_XDECREF(list);
    return nullptr;
}

std::optional<location_id_t>
RecordReader::getLatestPythonLocationId(const Allocation& allocation)
{
    if (!d_track_stacks) {
        throw std::runtime_error("Stack tracking is disabled");
    }

    if (0 == allocation.frame_index) {
        return {};
    }
    std::unique_lock<std::mutex> lock(d_mutex);
    auto frame_id = d_tree.nextNode(allocation.frame_index).first;
    Location location = frameToLocation(frame_id);
    return d_location_registry.registerRecord(location).first;
}

PyObject*
RecordReader::Py_GetLocation(std::optional<location_id_t> location_id)
{
    if (!d_track_stacks) {
        PyErr_SetString(PyExc_RuntimeError, "Stack tracking is disabled");
        return NULL;
    }

    if (!location_id) {
        Py_RETURN_NONE;
    }
    std::unique_lock<std::mutex> lock(d_mutex);
    return d_location_registry.getRecord(location_id.value()).toPythonObject(d_pystring_cache);
}

HeaderRecord
RecordReader::getHeader() const noexcept
{
    return d_header;
}

thread_id_t
RecordReader::getMainThreadTid() const noexcept
{
    return d_header.main_tid;
}

size_t
RecordReader::getSkippedFramesOnMainThread() const noexcept
{
    return d_header.skipped_frames_on_main_tid;
}

std::string
RecordReader::getThreadName(thread_id_t tid)
{
    auto it = d_thread_names.find(tid);
    if (it != d_thread_names.end()) {
        return it->second;
    }
    return "";
}

Allocation
RecordReader::getLatestAllocation() const noexcept
{
    return d_latest_allocation;
}

MemoryRecord
RecordReader::getLatestMemoryRecord() const noexcept
{
    return d_latest_memory_record;
}

AggregatedAllocation
RecordReader::getLatestAggregatedAllocation() const noexcept
{
    return d_latest_aggregated_allocation;
}

MemorySnapshot
RecordReader::getLatestMemorySnapshot() const noexcept
{
    return d_latest_memory_snapshot;
}

TrackedObject
RecordReader::getLatestObject() const noexcept
{
    return d_latest_object;
}

PyObject*
RecordReader::dumpAllRecords()
{
    std::string python_allocator;
    switch (d_header.python_allocator) {
        case PythonAllocatorType::PYTHONALLOCATOR_PYMALLOC:
            python_allocator = "pymalloc";
            break;
        case PythonAllocatorType::PYTHONALLOCATOR_PYMALLOC_DEBUG:
            python_allocator = "pymalloc debug";
            break;
        case PythonAllocatorType::PYTHONALLOCATOR_MIMALLOC:
            python_allocator = "mimalloc";
            break;
        case PythonAllocatorType::PYTHONALLOCATOR_MIMALLOC_DEBUG:
            python_allocator = "mimalloc debug";
            break;
        case PythonAllocatorType::PYTHONALLOCATOR_MALLOC:
            python_allocator = "pymalloc";
            break;
        case PythonAllocatorType::PYTHONALLOCATOR_OTHER:
            python_allocator = "other";
            break;
    }
    std::string file_format;
    switch (d_header.file_format) {
        case FileFormat::ALL_ALLOCATIONS: {
            file_format = "ALL_ALLOCATIONS";
        } break;
        case FileFormat::AGGREGATED_ALLOCATIONS: {
            file_format = "AGGREGATED_ALLOCATIONS";
        } break;
        default: {
            file_format = "<unknown enum value " + std::to_string((int)d_header.file_format) + ">";
        } break;
    }
    printf("HEADER magic=%.*s version=%d python_version=%08x native_traces=%s file_format=%s"
           " n_allocations=%zd n_frames=%zd start_time=%lld end_time=%lld"
           " pid=%d main_tid=%lu skipped_frames_on_main_tid=%zd"
           " command_line=%s python_allocator=%s trace_python_allocators=%s"
           " track_object_lifetimes=%s\n",
           (int)sizeof(d_header.magic),
           d_header.magic,
           d_header.version,
           d_header.python_version,
           d_header.native_traces ? "true" : "false",
           file_format.c_str(),
           d_header.stats.n_allocations,
           d_header.stats.n_frames,
           d_header.stats.start_time,
           d_header.stats.end_time,
           d_header.pid,
           d_header.main_tid,
           d_header.skipped_frames_on_main_tid,
           d_header.command_line.c_str(),
           python_allocator.c_str(),
           d_header.trace_python_allocators ? "true" : "false",
           d_header.track_object_lifetimes ? "true" : "false");

    switch (d_header.file_format) {
        case FileFormat::ALL_ALLOCATIONS:
            return dumpAllRecordsFromAllAllocationsFile();
        case FileFormat::AGGREGATED_ALLOCATIONS:
            return dumpAllRecordsFromAggregatedAllocationsFile();
        default:
            printf("UNRECOGNIZED FILE CONTENTS\n");
            Py_RETURN_NONE;
    }
}

PyObject*
RecordReader::dumpAllRecordsFromAllAllocationsFile()
{
    while (true) {
        if (0 != PyErr_CheckSignals()) {
            return nullptr;
        }

        unsigned char record_type_and_flags;
        if (!d_input->read(
                    reinterpret_cast<char*>(&record_type_and_flags),
                    sizeof(record_type_and_flags)))
        {
            Py_RETURN_NONE;
        }

        RecordType record_type;
        unsigned char flags;
        extractRecordTypeAndFlags(record_type_and_flags, &record_type, &flags);

        switch (record_type) {
            case RecordType::TRAILER: {
                printf("TRAILER\n");
                Py_RETURN_NONE;  // Treat as EOF
            } break;
            case RecordType::ALLOCATION: {
                printf("ALLOCATION ");

                AllocationRecord record;
                if (!parseAllocationRecord(&record, flags)) {
                    Py_RETURN_NONE;
                }

                const char* allocator = allocatorName(record.allocator);

                std::string unknownAllocator;
                if (!allocator) {
                    unknownAllocator =
                            "<unknown allocator " + std::to_string((int)record.allocator) + ">";
                    allocator = unknownAllocator.c_str();
                }
                printf("address=%p size=%zd allocator=%s native_frame_id=%zd\n",
                       (void*)record.address,
                       record.size,
                       allocator,
                       record.native_frame_id);
            } break;
            case RecordType::FRAME_PUSH: {
                printf("FRAME_PUSH ");

                FramePush record;
                if (!parseFramePush(&record, flags)) {
                    Py_RETURN_NONE;
                }

                printf("code_object_id=%zd instruction_offset=%d is_entry_frame=%d\n",
                       record.frame.code_object_id,
                       record.frame.instruction_offset,
                       record.frame.is_entry_frame);
            } break;
            case RecordType::FRAME_POP: {
                printf("FRAME_POP ");

                FramePop record;
                if (!parseFramePop(&record, flags)) {
                    Py_RETURN_NONE;
                }

                printf("count=%zd\n", record.count);
            } break;
            case RecordType::CODE_OBJECT: {
                printf("CODE_OBJECT ");
                tracking_api::pycode_map_val_t record;
                if (!parseCodeObjectRecord(&record)) {
                    Py_RETURN_NONE;
                }
                printf("code_id=%zd function_name=%s filename=%s firstlineno=%d linetable_size=%zd\n",
                       record.first,
                       record.second.function_name.c_str(),
                       record.second.filename.c_str(),
                       record.second.firstlineno,
                       record.second.linetable.size());
            } break;
            case RecordType::NATIVE_TRACE_INDEX: {
                printf("NATIVE_FRAME_ID ");

                UnresolvedNativeFrame record;
                if (!parseNativeFrameIndex(&record)) {
                    Py_RETURN_NONE;
                }

                printf("ip=%p index=%zu\n", (void*)record.ip, record.index);
            } break;
            case RecordType::MEMORY_MAP_START: {
                printf("MEMORY_MAP_START\n");
                if (!parseMemoryMapStart()) {
                    Py_RETURN_NONE;
                }
            } break;
            case RecordType::SEGMENT_HEADER: {
                printf("SEGMENT_HEADER ");

                std::string filename;
                size_t num_segments;
                uintptr_t addr;
                if (!parseSegmentHeader(&filename, &num_segments, &addr)) {
                    Py_RETURN_NONE;
                }

                printf("filename=%s num_segments=%zd addr=%p\n",
                       filename.c_str(),
                       num_segments,
                       (void*)addr);
            } break;
            case RecordType::SEGMENT: {
                printf("SEGMENT ");

                Segment record;
                if (!parseSegment(&record)) {
                    Py_RETURN_NONE;
                }

                printf("%p %" PRIxPTR "\n", (void*)record.vaddr, record.memsz);
            } break;
            case RecordType::THREAD_RECORD: {
                printf("THREAD ");

                std::string name;
                if (!parseThreadRecord(&name)) {
                    Py_RETURN_NONE;
                }

                printf("%s\n", name.c_str());
            } break;
            case RecordType::MEMORY_RECORD: {
                printf("MEMORY_RECORD ");

                MemoryRecord record;
                if (!parseMemoryRecord(&record)) {
                    Py_RETURN_NONE;
                }

                printf("time=%" PRIu64 " memory=%zd\n", record.ms_since_epoch, record.rss);
            } break;
            case RecordType::CONTEXT_SWITCH: {
                printf("CONTEXT_SWITCH ");

                thread_id_t tid;
                if (!parseContextSwitch(&tid)) {
                    Py_RETURN_NONE;
                }

                printf("tid=%lu\n", tid);
            } break;
            case RecordType::OBJECT_RECORD: {
                printf("OBJECT_RECORD ");

                ObjectRecord record;
                if (!parseObjectRecord(&record, flags)) {
                    Py_RETURN_NONE;
                }

                printf("address=%p native_frame_id=%zd\n",
                       (void*)record.address,
                       record.native_frame_id);
            } break;
            default: {
                printf("UNKNOWN RECORD TYPE %d\n", (int)record_type);
                Py_RETURN_NONE;
            } break;
        }
    }
}

PyObject*
RecordReader::dumpAllRecordsFromAggregatedAllocationsFile()
{
    while (true) {
        if (0 != PyErr_CheckSignals()) {
            return nullptr;
        }

        AggregatedRecordType record_type;
        if (!d_input->read(reinterpret_cast<char*>(&record_type), sizeof(record_type))) {
            Py_RETURN_NONE;
        }

        switch (record_type) {
            case AggregatedRecordType::MEMORY_SNAPSHOT: {
                printf("MEMORY_SNAPSHOT ");

                MemorySnapshot record;
                if (!parseMemorySnapshotRecord(&record)) {
                    Py_RETURN_NONE;
                }

                printf("time=%" PRIu64 " rss=%zd heap=%zd\n",
                       record.ms_since_epoch,
                       record.rss,
                       record.heap);
            } break;

            case AggregatedRecordType::AGGREGATED_ALLOCATION: {
                printf("AGGREGATED_ALLOCATION ");

                AggregatedAllocation record;
                if (!parseAggregatedAllocationRecord(&record)) {
                    Py_RETURN_NONE;
                }

                const char* allocator = allocatorName(record.allocator);

                std::string unknownAllocator;
                if (!allocator) {
                    unknownAllocator =
                            "<unknown allocator " + std::to_string((int)record.allocator) + ">";
                    allocator = unknownAllocator.c_str();
                }

                printf("tid=%lu allocator=%s native_frame_id=%zd python_frame_id=%zd"
                       " native_segment_generation=%zd n_allocations_in_high_water_mark=%zd"
                       " n_allocations_leaked=%zd bytes_in_high_water_mark=%zd bytes_leaked=%zd\n",
                       record.tid,
                       allocator,
                       record.native_frame_id,
                       record.frame_index,
                       record.native_segment_generation,
                       record.n_allocations_in_high_water_mark,
                       record.n_allocations_leaked,
                       record.bytes_in_high_water_mark,
                       record.bytes_leaked);
            } break;

            case AggregatedRecordType::PYTHON_TRACE_INDEX: {
                printf("PYTHON_TRACE_INDEX ");

                std::pair<frame_id_t, FrameTree::index_t> record;
                if (!parsePythonTraceIndexRecord(&record)) {
                    Py_RETURN_NONE;
                }

                printf("frame_id=%zd parent_index=%zd\n", record.first, record.second);
            } break;

            case AggregatedRecordType::PYTHON_FRAME_INDEX: {
                printf("PYTHON_FRAME_INDEX ");

                std::pair<frame_id_t, Frame> record;
                if (!parsePythonFrameIndexRecord(&record)) {
                    Py_RETURN_NONE;
                }

                printf("frame_id=%zd code_object_id=%zd instruction_offset=%d is_entry_frame=%d\n",
                       record.first,
                       record.second.code_object_id,
                       record.second.instruction_offset,
                       record.second.is_entry_frame);
            } break;

            case AggregatedRecordType::NATIVE_TRACE_INDEX: {
                printf("NATIVE_TRACE_INDEX ");

                UnresolvedNativeFrame record;
                if (!parseNativeFrameIndex(&record)) {
                    Py_RETURN_NONE;
                }

                printf("ip=%p index=%zu\n", (void*)record.ip, record.index);
            } break;

            case AggregatedRecordType::MEMORY_MAP_START: {
                printf("MEMORY_MAP_START\n");
                if (!parseMemoryMapStart()) {
                    Py_RETURN_NONE;
                }
            } break;

            case AggregatedRecordType::SEGMENT_HEADER: {
                printf("SEGMENT_HEADER ");

                std::string filename;
                size_t num_segments;
                uintptr_t addr;
                if (!parseSegmentHeader(&filename, &num_segments, &addr)) {
                    Py_RETURN_NONE;
                }

                printf("filename=%s num_segments=%zd addr=%p\n",
                       filename.c_str(),
                       num_segments,
                       (void*)addr);
            } break;

            case AggregatedRecordType::SEGMENT: {
                printf("SEGMENT ");

                Segment record;
                if (!parseSegment(&record)) {
                    Py_RETURN_NONE;
                }

                printf("%p %" PRIxPTR "\n", (void*)record.vaddr, record.memsz);
            } break;

            case AggregatedRecordType::THREAD_RECORD: {
                printf("THREAD_RECORD ");

                std::string name;
                if (!parseThreadRecord(&name)) {
                    Py_RETURN_NONE;
                }

                printf("%s\n", name.c_str());
            } break;

            case AggregatedRecordType::CONTEXT_SWITCH: {
                printf("CONTEXT_SWITCH ");

                thread_id_t tid;
                if (!parseContextSwitch(&tid)) {
                    Py_RETURN_NONE;
                }

                printf("tid=%lu\n", tid);
            } break;

            case AggregatedRecordType::SURVIVING_OBJECT: {
                printf("SURVIVING_OBJECT ");
                ObjectRecord record;
                if (!parseSurvivingObjectRecord(&record)) {
                    Py_RETURN_NONE;
                }
                printf("address=0x%p native_frame_id=%zu\n",
                       (void*)record.address,
                       record.native_frame_id);
            } break;

            case AggregatedRecordType::CODE_OBJECT: {
                printf("CODE_OBJECT ");
                tracking_api::pycode_map_val_t record;
                if (!parseCodeObjectRecord(&record)) {
                    Py_RETURN_NONE;
                }
                printf("code_id=%zd function_name=%s filename=%s firstlineno=%d linetable_size=%zd\n",
                       record.first,
                       record.second.function_name.c_str(),
                       record.second.filename.c_str(),
                       record.second.firstlineno,
                       record.second.linetable.size());
            } break;

            case AggregatedRecordType::AGGREGATED_TRAILER: {
                printf("AGGREGATED_TRAILER\n");
                Py_RETURN_NONE;  // Treat as EOF
            } break;

            default: {
                printf("UNKNOWN RECORD TYPE %d\n", (int)record_type);
                Py_RETURN_NONE;
            } break;
        }
    }
}

}  // namespace memray::api
